{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h1 style=\"color:blue\" align=\"center\">Pandas Time Series Analysis Tutorial: Handling Holidays</h1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>144.88</td>\n",
       "      <td>145.30</td>\n",
       "      <td>143.10</td>\n",
       "      <td>143.50</td>\n",
       "      <td>14277848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>143.69</td>\n",
       "      <td>144.79</td>\n",
       "      <td>142.72</td>\n",
       "      <td>144.09</td>\n",
       "      <td>21569557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>143.02</td>\n",
       "      <td>143.50</td>\n",
       "      <td>142.41</td>\n",
       "      <td>142.73</td>\n",
       "      <td>24128782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>142.90</td>\n",
       "      <td>144.75</td>\n",
       "      <td>142.90</td>\n",
       "      <td>144.18</td>\n",
       "      <td>19201712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>144.11</td>\n",
       "      <td>145.95</td>\n",
       "      <td>143.37</td>\n",
       "      <td>145.06</td>\n",
       "      <td>21090636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Open    High     Low   Close    Volume\n",
       "0  144.88  145.30  143.10  143.50  14277848\n",
       "1  143.69  144.79  142.72  144.09  21569557\n",
       "2  143.02  143.50  142.41  142.73  24128782\n",
       "3  142.90  144.75  142.90  144.18  19201712\n",
       "4  144.11  145.95  143.37  145.06  21090636"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv(\"aapl_no_dates.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-07-03', '2017-07-04', '2017-07-05', '2017-07-06',\n",
       "               '2017-07-07', '2017-07-10', '2017-07-11', '2017-07-12',\n",
       "               '2017-07-13', '2017-07-14', '2017-07-17', '2017-07-18',\n",
       "               '2017-07-19', '2017-07-20', '2017-07-21'],\n",
       "              dtype='datetime64[ns]', freq='B')"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rng = pd.date_range(start=\"7/1/2017\", end=\"7/21/2017\", freq='B')\n",
    "rng"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Using 'B' frequency is not going to help because 4th July was holiday and 'B' is not taking that into account. \n",
    "It only accounts for weekends**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h3 style=\"color:purple\">Using CustomBusinessDay to generate US holidays calendar frequency</h3>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07',\n",
       "               '2017-07-10', '2017-07-11', '2017-07-12', '2017-07-13',\n",
       "               '2017-07-14', '2017-07-17', '2017-07-18', '2017-07-19',\n",
       "               '2017-07-20', '2017-07-21'],\n",
       "              dtype='datetime64[ns]', freq='C')"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pandas.tseries.holiday import USFederalHolidayCalendar\n",
    "from pandas.tseries.offsets import CustomBusinessDay\n",
    "\n",
    "us_cal = CustomBusinessDay(calendar=USFederalHolidayCalendar())\n",
    "\n",
    "rng = pd.date_range(start=\"7/1/2017\",end=\"7/23/2017\", freq=us_cal)\n",
    "rng"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2017-07-03</th>\n",
       "      <td>144.88</td>\n",
       "      <td>145.30</td>\n",
       "      <td>143.10</td>\n",
       "      <td>143.50</td>\n",
       "      <td>14277848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-07-05</th>\n",
       "      <td>143.69</td>\n",
       "      <td>144.79</td>\n",
       "      <td>142.72</td>\n",
       "      <td>144.09</td>\n",
       "      <td>21569557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-07-06</th>\n",
       "      <td>143.02</td>\n",
       "      <td>143.50</td>\n",
       "      <td>142.41</td>\n",
       "      <td>142.73</td>\n",
       "      <td>24128782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-07-07</th>\n",
       "      <td>142.90</td>\n",
       "      <td>144.75</td>\n",
       "      <td>142.90</td>\n",
       "      <td>144.18</td>\n",
       "      <td>19201712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-07-10</th>\n",
       "      <td>144.11</td>\n",
       "      <td>145.95</td>\n",
       "      <td>143.37</td>\n",
       "      <td>145.06</td>\n",
       "      <td>21090636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Open    High     Low   Close    Volume\n",
       "2017-07-03  144.88  145.30  143.10  143.50  14277848\n",
       "2017-07-05  143.69  144.79  142.72  144.09  21569557\n",
       "2017-07-06  143.02  143.50  142.41  142.73  24128782\n",
       "2017-07-07  142.90  144.75  142.90  144.18  19201712\n",
       "2017-07-10  144.11  145.95  143.37  145.06  21090636"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index(rng,inplace=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**You can define your own calendar using AbstractHolidayCalendar as shown below. USFederalHolidayCalendar is the only calendar available in pandas library and it serves as an example for those who want to write their own custom calendars. Here is the link for USFederalHolidayCalendar implementation** https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/holiday.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h3 style=\"color:purple\">AbstractHolidayCalendar</h3>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-04-03', '2017-04-04', '2017-04-05', '2017-04-06',\n",
       "               '2017-04-07', '2017-04-10', '2017-04-11', '2017-04-12',\n",
       "               '2017-04-13', '2017-04-14', '2017-04-17', '2017-04-18',\n",
       "               '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-24',\n",
       "               '2017-04-25', '2017-04-26', '2017-04-27', '2017-04-28'],\n",
       "              dtype='datetime64[ns]', freq='C')"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday\n",
    "class myCalendar(AbstractHolidayCalendar):\n",
    "    rules = [\n",
    "        Holiday('My Birth Day', month=4, day=15),#, observance=nearest_workday),\n",
    "    ]\n",
    "    \n",
    "my_bday = CustomBusinessDay(calendar=myCalendar())\n",
    "pd.date_range('4/1/2017','4/30/2017',freq=my_bday)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h3 style=\"color:purple\">CustomBusinessDay</h3>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Weekend in egypt is Friday and Saturday. Sunday is just a normal weekday and you can handle this custom week schedule using\n",
    "CystomBysinessDay with weekmask as shown below**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-07-02', '2017-07-03', '2017-07-04', '2017-07-05',\n",
       "               '2017-07-06', '2017-07-09', '2017-07-10', '2017-07-11',\n",
       "               '2017-07-12', '2017-07-13', '2017-07-16', '2017-07-17',\n",
       "               '2017-07-18', '2017-07-19', '2017-07-20', '2017-07-23',\n",
       "               '2017-07-24', '2017-07-25', '2017-07-26', '2017-07-27'],\n",
       "              dtype='datetime64[ns]', freq='C')"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "egypt_weekdays = \"Sun Mon Tue Wed Thu\"\n",
    "\n",
    "b = CustomBusinessDay(weekmask=egypt_weekdays)\n",
    "\n",
    "pd.date_range(start=\"7/1/2017\",periods=20,freq=b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**You can also add holidays to this custom business day frequency**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-07-02', '2017-07-03', '2017-07-05', '2017-07-06',\n",
       "               '2017-07-09', '2017-07-11', '2017-07-12', '2017-07-13',\n",
       "               '2017-07-16', '2017-07-17', '2017-07-18', '2017-07-19',\n",
       "               '2017-07-20', '2017-07-23', '2017-07-24', '2017-07-25',\n",
       "               '2017-07-26', '2017-07-27', '2017-07-30', '2017-07-31'],\n",
       "              dtype='datetime64[ns]', freq='C')"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = CustomBusinessDay(holidays=['2017-07-04', '2017-07-10'], weekmask=egypt_weekdays)\n",
    "\n",
    "pd.date_range(start=\"7/1/2017\",periods=20,freq=b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Mathematical operations on date object using custom business day**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "datetime.datetime(2017, 7, 9, 0, 0)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "dt = datetime(2017,7,9)\n",
    "dt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2017-07-11 00:00:00')"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt + 1*b"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
